1 Prepare

1.1 Setup

1.2 Install package

if (!requireNamespace("rvest", quietly = TRUE)) install.packages("rvest")
if (!requireNamespace("knitr", quietly = TRUE)) install.packages("knitr")
if (!requireNamespace("httr", quietly = TRUE)) install.packages("httr")
if (!requireNamespace("readr", quietly = TRUE)) install.packages("readr")
if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse")
if (!requireNamespace("Hmisc", quietly = TRUE)) install.packages("Hmisc")
if (!requireNamespace("details", quietly = TRUE)) install.packages("details")
if (!requireNamespace("paletteer", quietly = TRUE)) install.packages("paletteer", dependencies = TRUE)
if (!require(janitor)) install.packages("janitor", dependencies = TRUE)
## Loading required package: janitor
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
if (!require(patchwork)) install.packages("patchwork", dependencies = TRUE)
## Loading required package: patchwork
library(httr)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janitor)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(ggplot2)
library(patchwork)
library(Hmisc) #Library for describe data
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(details)
library(rvest)
## 
## Attaching package: 'rvest'
## 
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(rmarkdown)
library(rlang)
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
library(purrr)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(grid)
library(paletteer)

2 Task 1

2.1 Load and represent the dataset

# Read data from the the csv file
zigong <- readr::read_csv("C:/Users/maggi/OneDrive/Documents/GitHub/individual-marked-assessment-2-Maggie1910/zigong/dat.csv")
## New names:
## Rows: 2008 Columns: 167
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): DestinationDischarge, admission.ward, admission.way, occupation, ... dbl
## (151): ...1, inpatient.number, visit.times, body.temperature, pulse, res... lgl
## (1): cholinesterase
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#Represent data
head(zigong)

2.2 Describe raw data

summarise_raw_data <- zigong %>%
  describe()
details::details(
  summary = 'Click to show result!',
  summarise_raw_data
)
Click to show result!

. 

 167  Variables      2008  Observations
--------------------------------------------------------------------------------
...1 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0     2008        1     1004    669.7    101.4    201.7 
     .25      .50      .75      .90      .95 
   502.8   1004.5   1506.2   1807.3   1907.6 

lowest :    1    2    3    4    5, highest: 2004 2005 2006 2007 2008
--------------------------------------------------------------------------------
inpatient.number 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0     2008        1   797748    47374   733337   739576 
     .25      .50      .75      .90      .95 
  763165   798758   829400   858540   864183 

lowest : 722128 723327 723617 724385 725509, highest: 870258 870646 879601 905163 905720
--------------------------------------------------------------------------------
DestinationDischarge 
       n  missing distinct 
    2008        0        4 
                                                                   
Value                    Died HealthcareFacility               Home
Frequency                  14                438               1344
Proportion              0.007              0.218              0.669
                             
Value                 Unknown
Frequency                 212
Proportion              0.106
--------------------------------------------------------------------------------
admission.ward 
       n  missing distinct 
    2008        0        4 
                                                          
Value       Cardiology GeneralWard         ICU      Others
Frequency         1547         265          15         181
Proportion       0.770       0.132       0.007       0.090
--------------------------------------------------------------------------------
admission.way 
       n  missing distinct 
    2008        0        2 
                                    
Value         Emergency NonEmergency
Frequency           956         1052
Proportion        0.476        0.524
--------------------------------------------------------------------------------
occupation 
       n  missing distinct 
    1981       27        5 
                                                                  
Value             farmer       Officer        Others UrbanResident
Frequency            198             7            89          1670
Proportion         0.100         0.004         0.045         0.843
                        
Value             worker
Frequency             17
Proportion         0.009
--------------------------------------------------------------------------------
discharge.department 
       n  missing distinct 
    2008        0        4 
                                                          
Value       Cardiology GeneralWard         ICU      Others
Frequency         1703         241          12          52
Proportion       0.848       0.120       0.006       0.026
--------------------------------------------------------------------------------
visit.times 
       n  missing distinct     Info     Mean      Gmd 
    2008        0        5    0.205    1.093   0.1741 
                                        
Value          1     2     3     4     5
Frequency   1860   120    20     6     2
Proportion 0.926 0.060 0.010 0.003 0.001

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
gender 
       n  missing distinct 
    2008        0        2 
                        
Value      Female   Male
Frequency    1163    845
Proportion  0.579  0.421
--------------------------------------------------------------------------------
body.temperature 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       33    0.981    36.42   0.4083     36.0     36.0 
     .25      .50      .75      .90      .95 
    36.2     36.3     36.5     36.8     37.1 

lowest : 35   35.5 36   36.1 36.2, highest: 38.6 38.9 39   39.1 42  
--------------------------------------------------------------------------------
pulse 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0      123        1    85.24    23.76    55.35    60.00 
     .25      .50      .75      .90      .95 
   70.00    82.00    98.00   113.00   123.00 

lowest :   0  32  33  35  36, highest: 168 172 180 190 198
--------------------------------------------------------------------------------
respiration 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       19    0.899    19.09    1.501       18       18 
     .25      .50      .75      .90      .95 
      18       19       19       21       22 
                                                                            
Value          0    15    16    17    18    19    20    21    22    23    24
Frequency      1     3     2    48   798   662   254   100    72    19    18
Proportion 0.000 0.001 0.001 0.024 0.397 0.330 0.126 0.050 0.036 0.009 0.009
                                                          
Value         25    26    27    28    29    30    32    36
Frequency     11     5     2     5     1     2     3     2
Proportion 0.005 0.002 0.001 0.002 0.000 0.001 0.001 0.001

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
systolic.blood.pressure 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0      122    0.998    131.1    27.51     94.0    100.0 
     .25      .50      .75      .90      .95 
   113.0    130.0    146.2    162.0    174.0 

lowest :   0  50  68  72  73, highest: 205 209 210 232 252
--------------------------------------------------------------------------------
diastolic.blood.pressure 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       80    0.994    76.57    15.97       58       60 
     .25      .50      .75      .90      .95 
      65       76       85       96      100 

lowest :   0  30  33  40  46, highest: 120 123 125 130 146
--------------------------------------------------------------------------------
map 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0      219        1    94.73    18.04    71.33    75.23 
     .25      .50      .75      .90      .95 
   83.33    93.33   104.67   116.00   123.33 

lowest : 0       36.6667 44.6667 50.6667 60     
highest: 148.333 149.333 150     150.667 181.333
--------------------------------------------------------------------------------
weight 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       86    0.997    52.48    11.84    40.00    40.00 
     .25      .50      .75      .90      .95 
   45.00    50.00    60.00    67.15    72.82 

lowest : 0    8    30   31   31.5, highest: 91   92   95   96   115 
--------------------------------------------------------------------------------
height 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       51    0.991    1.568  0.09624     1.45     1.48 
     .25      .50      .75      .90      .95 
    1.50     1.56     1.62     1.70     1.70 

lowest : 0.35 0.48 0.6  1.2  1.25, highest: 1.77 1.78 1.79 1.8  1.83
--------------------------------------------------------------------------------
BMI 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0      661        1    21.79    5.309    16.22    16.99 
     .25      .50      .75      .90      .95 
   18.49    20.76    23.44    26.44    28.62 

lowest : 0       3.4626  13.0612 13.3195 13.3333
highest: 37.551  39.1111 138.889 212.674 404.082
--------------------------------------------------------------------------------
type.of.heart.failure 
       n  missing distinct 
    2008        0        3 
                            
Value       Both  Left Right
Frequency   1480   477    51
Proportion 0.737 0.238 0.025
--------------------------------------------------------------------------------
NYHA.cardiac.function.classification 
       n  missing distinct 
    2008        0        3 
                            
Value         II   III    IV
Frequency    353  1039   616
Proportion 0.176 0.517 0.307
--------------------------------------------------------------------------------
Killip.grade 
       n  missing distinct 
    2008        0        4 
                                  
Value          I    II   III    IV
Frequency    527  1029   392    60
Proportion 0.262 0.512 0.195 0.030
--------------------------------------------------------------------------------
myocardial.infarction 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.198      143  0.07122   0.1324 

--------------------------------------------------------------------------------
congestive.heart.failure 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.189     1872   0.9323   0.1263 

--------------------------------------------------------------------------------
peripheral.vascular.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.143      101   0.0503  0.09559 

--------------------------------------------------------------------------------
cerebrovascular.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.207      150   0.0747   0.1383 

--------------------------------------------------------------------------------
dementia 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.162      115  0.05727    0.108 

--------------------------------------------------------------------------------
Chronic.obstructive.pulmonary.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.308      233    0.116   0.2052 

--------------------------------------------------------------------------------
connective.tissue.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.006        4 0.001992 0.003978 

--------------------------------------------------------------------------------
peptic.ulcer.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2006        2        2    0.066       45  0.02243  0.04388 

--------------------------------------------------------------------------------
diabetes 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.535      466   0.2321   0.3566 

--------------------------------------------------------------------------------
moderate.to.severe.chronic.kidney.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2006        2        2    0.541      474   0.2363   0.3611 

--------------------------------------------------------------------------------
hemiplegia 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.018       12 0.005976  0.01189 

--------------------------------------------------------------------------------
leukemia 
       n  missing distinct     Info     Mean      Gmd 
    2008        0        1        0        0        0 
               
Value         0
Frequency  2008
Proportion    1
--------------------------------------------------------------------------------
malignant.lymphoma 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.001        1 0.000498 0.000996 

--------------------------------------------------------------------------------
solid.tumor 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.057       39  0.01942  0.03811 

--------------------------------------------------------------------------------
liver.disease 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2007        1        2     0.12       84  0.04185  0.08024 

--------------------------------------------------------------------------------
AIDS 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.006        4 0.001992 0.003978 

--------------------------------------------------------------------------------
CCI.score 
       n  missing distinct     Info     Mean      Gmd 
    2003        5        7    0.894    1.862    1.024 
                                                    
Value          0     1     2     3     4     5     6
Frequency     56   770   699   368    94    15     1
Proportion 0.028 0.384 0.349 0.184 0.047 0.007 0.000

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
type.II.respiratory.failure 
       n  missing distinct 
    2008        0        2 
                              
Value      NonTypeII    TypeII
Frequency       1894       114
Proportion     0.943     0.057
--------------------------------------------------------------------------------
consciousness 
       n  missing distinct 
    2008        0        4 
                                                                
Value                  Clear     Nonresponsive  ResponsiveToPain
Frequency               1974                11                 4
Proportion             0.983             0.005             0.002
                            
Value      ResponsiveToSound
Frequency                 19
Proportion             0.009
--------------------------------------------------------------------------------
eye.opening 
       n  missing distinct     Info     Mean      Gmd 
    2008        0        4    0.061    3.964  0.07163 
                                  
Value          1     2     3     4
Frequency     14     3    25  1966
Proportion 0.007 0.001 0.012 0.979

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
verbal.response 
       n  missing distinct     Info     Mean      Gmd 
    2008        0        5    0.064     4.94   0.1175 
                                        
Value          1     2     3     4     5
Frequency     14     8    18     4  1964
Proportion 0.007 0.004 0.009 0.002 0.978

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
movement 
       n  missing distinct     Info     Mean      Gmd 
    2008        0        6    0.071    5.927   0.1431 
                                              
Value          1     2     3     4     5     6
Frequency     22     1     2     2    22  1959
Proportion 0.011 0.000 0.001 0.001 0.011 0.976

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
respiratory.support. 
       n  missing distinct 
    2008        0        3 
                            
Value        IMV  NIMV  None
Frequency     25    17  1966
Proportion 0.012 0.008 0.979
--------------------------------------------------------------------------------
oxygen.inhalation 
       n  missing distinct 
    2008        0        2 
                                      
Value         AmbientAir OxygenTherapy
Frequency            110          1898
Proportion         0.055         0.945
--------------------------------------------------------------------------------
fio2 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       15    0.326    32.67    2.406       21       33 
     .25      .50      .75      .90      .95 
      33       33       33       33       33 
                                                                            
Value         21    29    33    35    40    41    45    50    53    54    55
Frequency    110    71  1760     1    22    26     3     2     1     2     1
Proportion 0.055 0.035 0.876 0.000 0.011 0.013 0.001 0.001 0.000 0.001 0.000
                                  
Value         60    61    80   100
Frequency      3     1     1     4
Proportion 0.001 0.000 0.000 0.002

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
acute.renal.failure 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2     0.01        7 0.003486 0.006951 

--------------------------------------------------------------------------------
LVEF 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     635     1373       66    0.999    50.68    15.09     28.0     33.0 
     .25      .50      .75      .90      .95 
    41.0     51.0     61.0     68.0     70.3 

lowest :  5 16 17 19 20, highest: 76 78 79 81 82
--------------------------------------------------------------------------------
left.ventricular.end.diastolic.diameter.LV 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1311      697       63    0.999    53.11    12.19       37       40 
     .25      .50      .75      .90      .95 
      45       53       60       68       73 

lowest : 0.3  0.93 22   28   29  , highest: 83   85   86   87   88  
--------------------------------------------------------------------------------
mitral.valve.EMS 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     980     1028      199        1     4.85    7.987   0.4600   0.5400 
     .25      .50      .75      .90      .95 
  0.7675   1.0100   1.2800   1.6530   2.1000 
                                                                            
Value        0.0   0.5   1.0   1.5   2.0   2.5   3.0   3.5  36.0  44.0 401.0
Frequency     65   399   389    75    27     9     3     2     1     1     2
Proportion 0.066 0.407 0.397 0.077 0.028 0.009 0.003 0.002 0.001 0.001 0.002
                                              
Value      403.0 405.0 406.0 407.0 408.0 409.0
Frequency      1     2     1     1     1     1
Proportion 0.001 0.002 0.001 0.001 0.001 0.001

For the frequency table, variable is rounded to the nearest 0.5
--------------------------------------------------------------------------------
mitral.valve.AMS 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     550     1458      133        1    4.087    6.806   0.3645   0.4300 
     .25      .50      .75      .90      .95 
  0.5900   0.8300   1.0500   1.2400   1.3765 

lowest : 0.06 0.1  0.12 0.25 0.27, highest: 40   99   401  406  408 
--------------------------------------------------------------------------------
EA 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     393     1615      309        1     1.29   0.9238   0.4492   0.5150 
     .25      .50      .75      .90      .95 
  0.6560   0.9800   1.6060   2.3240   2.7958 

lowest : 0.0616 0.073  0.083  0.275  0.295 , highest: 3.56   3.758  4      7      21.3  
--------------------------------------------------------------------------------
tricuspid.valve.return.velocity 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     790     1218      102    0.997    2.991   0.6761    2.100    2.300 
     .25      .50      .75      .90      .95 
   2.600    2.900    3.348    3.737    4.005 

lowest : 0.9  1.1  1.2  1.3  1.36, highest: 4.9  5    5.1  5.26 5.76
--------------------------------------------------------------------------------
tricuspid.valve.return.pressure 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     182     1826       58    0.999    35.91    14.95    18.00    22.00 
     .25      .50      .75      .90      .95 
   27.25    34.00    43.00    53.90    58.95 

lowest : 1.7 2.1 3   7   14 , highest: 63  64  81  82  87 
--------------------------------------------------------------------------------
outcome.during.hospitalization 
       n  missing distinct 
    2008        0        3 
                                                                            
Value                      Alive                  Dead DischargeAgainstOrder
Frequency                   1890                    11                   107
Proportion                 0.941                 0.005                 0.053
--------------------------------------------------------------------------------
death.within.28.days 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.054       37  0.01843  0.03619 

--------------------------------------------------------------------------------
re.admission.within.28.days 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.195      140  0.06972   0.1298 

--------------------------------------------------------------------------------
death.within.3.months 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.061       42  0.02092  0.04098 

--------------------------------------------------------------------------------
re.admission.within.3.months 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2     0.56      498    0.248   0.3732 

--------------------------------------------------------------------------------
death.within.6.months 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2    0.083       57  0.02839  0.05519 

--------------------------------------------------------------------------------
re.admission.within.6.months 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2008        0        2     0.71      773    0.385   0.4738 

--------------------------------------------------------------------------------
time.of.death..days.from.admission. 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
      44     1964       23    0.987    29.52    47.31     1.00     2.00 
     .25      .50      .75      .90      .95 
    2.00     4.50    18.75    46.90   211.60 

lowest :   1   2   3   4   5, highest:  52  62 238 269 350
--------------------------------------------------------------------------------
re.admission.time..days.from.admission. 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     901     1107      311        1    126.7    124.9       16       22 
     .25      .50      .75      .90      .95 
      42       83      166      286      369 

lowest :    1    2    4    5    6, highest:  757  795  824  888 2280
--------------------------------------------------------------------------------
return.to.emergency.department.within.6.months 
       n  missing distinct     Info      Sum     Mean      Gmd 
    2007        1        2    0.711      775   0.3861   0.4743 

--------------------------------------------------------------------------------
time.to.emergency.department.within.6.months 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     897     1111      311        1    126.7    124.9     16.0     21.6 
     .25      .50      .75      .90      .95 
    42.0     83.0    166.0    286.0    369.0 

lowest :    1    2    4    5    6, highest:  757  795  824  888 2280
--------------------------------------------------------------------------------
creatinine.enzymatic.method 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1985       23     1117        1    108.9     67.4    47.00    53.24 
     .25      .50      .75      .90      .95 
   64.90    87.10   122.70   180.00   246.80 

lowest : 27.6  28.4  29.6  29.7  30.5 , highest: 636.8 653.6 689.8 897.1 963.4
--------------------------------------------------------------------------------
urea 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1985       23     1098        1    9.565    5.581    3.822    4.520 
     .25      .50      .75      .90      .95 
   5.900    8.030   11.540   16.578   21.106 

lowest : 1.58  1.71  1.73  1.86  1.87 , highest: 37.15 37.18 41.47 44.8  45.63
--------------------------------------------------------------------------------
uric.acid 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1985       23      626        1      483    186.5    255.0    294.0 
     .25      .50      .75      .90      .95 
   361.0    458.0    572.0    717.6    802.8 

lowest :   62  129  131  140  142, highest: 1116 1135 1225 1254 1409
--------------------------------------------------------------------------------
glomerular.filtration.rate 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1945       63     1754        1    68.66     40.1    17.40    25.88 
     .25      .50      .75      .90      .95 
   41.60    64.79    90.12   115.00   133.61 

lowest : 3.13   3.62   4.3    4.62   5     , highest: 235.59 245.52 257.67 259.51 281.01
--------------------------------------------------------------------------------
cystatin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1967       41      372        1    1.843   0.9508    0.880    1.010 
     .25      .50      .75      .90      .95 
   1.210    1.550    2.200    3.094    3.710 

lowest : 0.23  0.36  0.54  0.57  0.58 , highest: 6.16  6.35  6.57  7.06  10.37
--------------------------------------------------------------------------------
white.blood.cell 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      878        1    7.308    3.484     3.36     4.07 
     .25      .50      .75      .90      .95 
    5.07     6.50     8.67    11.40    13.55 

lowest : 1.66  1.69  1.82  2.14  2.18 , highest: 26.04 26.31 28.14 31.46 32.79
--------------------------------------------------------------------------------
monocyte.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      151        1  0.06864  0.02856    0.031    0.039 
     .25      .50      .75      .90      .95 
   0.052    0.066    0.082    0.101    0.115 

lowest : 0.002 0.004 0.005 0.006 0.007, highest: 0.182 0.184 0.233 0.241 0.27 
--------------------------------------------------------------------------------
monocyte.count 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      143        1   0.4748   0.2498     0.19     0.24 
     .25      .50      .75      .90      .95 
    0.32     0.42     0.57     0.79     0.92 

lowest : 0.01 0.02 0.03 0.04 0.05, highest: 1.98 2    2.03 2.24 2.33
--------------------------------------------------------------------------------
red.blood.cell 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      374        1    3.856   0.8353     2.52     2.91 
     .25      .50      .75      .90      .95 
    3.43     3.89     4.31     4.73     5.01 

lowest : 0.89 0.9  0.91 1.02 1.05, highest: 6.62 6.72 6.78 7    7.02
--------------------------------------------------------------------------------
coefficient.of.variation.of.red.blood.cell.distribution.width 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1978       30      116    0.999    14.92    2.027     12.8     13.1 
     .25      .50      .75      .90      .95 
    13.6     14.4     15.6     17.4     19.0 

lowest : 11.8 11.9 12   12.1 12.2, highest: 24.7 25   25.5 27.3 29.9
--------------------------------------------------------------------------------
standard.deviation.of.red.blood.cell.distribution.width 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1978       30      306        1    48.84    6.521    40.79    42.70 
     .25      .50      .75      .90      .95 
   45.10    47.80    51.40    55.90    59.21 

lowest : 32.3 32.9 33.2 33.5 33.9, highest: 88.1 89.1 91.3 94.1 98  
--------------------------------------------------------------------------------
mean.corpuscular.volume 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1980       28      410        1    92.02    9.237    72.70    81.69 
     .25      .50      .75      .90      .95 
   88.80    93.30    96.90   100.90   103.80 

lowest : 57.8  58.7  59.9  60.4  61.2 , highest: 119.8 120.3 120.4 121.1 135.8
--------------------------------------------------------------------------------
hematocrit 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1980       28      341        1   0.3529  0.07599   0.2310   0.2590 
     .25      .50      .75      .90      .95 
  0.3150   0.3590   0.3980   0.4301   0.4530 

lowest : 0.094 0.095 0.097 0.099 0.104, highest: 0.585 0.586 0.591 0.595 0.605
--------------------------------------------------------------------------------
lymphocyte.count 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      261        1    1.032   0.6135     0.33     0.41 
     .25      .50      .75      .90      .95 
    0.62     0.93     1.29     1.73     2.06 

lowest : 0.08 0.1  0.14 0.15 0.16, highest: 4.76 5.36 5.73 6.57 6.69
--------------------------------------------------------------------------------
mean.hemoglobin.volume 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1980       28      193        1    29.93    3.587     22.2     25.3 
     .25      .50      .75      .90      .95 
    28.7     30.6     32.0     33.3     34.3 

lowest : 15.6 17.5 17.6 17.7 17.8, highest: 38.9 39   39.4 41   45.3
--------------------------------------------------------------------------------
mean.hemoglobin.concentration 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27       95    0.999    324.8    15.02      300      308 
     .25      .50      .75      .90      .95 
     318      326      334      340      345 

lowest : 252 254 257 264 266, highest: 357 359 360 362 363
--------------------------------------------------------------------------------
mean.platelet.volume 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1905      103       92        1    12.11    1.928     9.50     9.90 
     .25      .50      .75      .90      .95 
   10.90    12.10    13.30    14.36    15.10 

lowest : 7.7  7.8  8    8.1  8.2 , highest: 16.8 16.9 17   17.1 17.6
--------------------------------------------------------------------------------
basophil.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27       28    0.983 0.004592 0.003382    0.001    0.001 
     .25      .50      .75      .90      .95 
   0.002    0.004    0.006    0.008    0.011 

lowest : 0     0.001 0.002 0.003 0.004, highest: 0.024 0.027 0.028 0.043 0.063
--------------------------------------------------------------------------------
basophil.count 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27       27    0.961  0.03114  0.02347     0.01     0.01 
     .25      .50      .75      .90      .95 
    0.02     0.03     0.04     0.06     0.07 

lowest : 0    0.01 0.02 0.03 0.04, highest: 0.22 0.23 0.24 0.28 0.73
--------------------------------------------------------------------------------
eosinophil.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      128    0.998  0.01851  0.02326    0.000    0.001 
     .25      .50      .75      .90      .95 
   0.003    0.009    0.022    0.041    0.065 

lowest : 0     0.001 0.002 0.003 0.004, highest: 0.23  0.274 0.276 0.3   0.62 
--------------------------------------------------------------------------------
eosinophil.count 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27       97    0.995   0.1193   0.1523     0.00     0.00 
     .25      .50      .75      .90      .95 
    0.02     0.06     0.13     0.27     0.40 

lowest : 0    0.01 0.02 0.03 0.04, highest: 1.79 2.16 2.52 3.26 6.58
--------------------------------------------------------------------------------
hemoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1980       28      143        1    115.1    27.22       72       82 
     .25      .50      .75      .90      .95 
     101      117      131      144      151 

lowest :  29  30  31  32  34, highest: 181 184 191 193 200
--------------------------------------------------------------------------------
platelet 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      299        1    145.1    67.97       67       76 
     .25      .50      .75      .90      .95 
     101      135      177      223      261 

lowest :   5   6   7   9  11, highest: 514 522 582 615 646
--------------------------------------------------------------------------------
platelet.distribution.width 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1905      103      126    0.996    16.36    1.251     14.4     15.4 
     .25      .50      .75      .90      .95 
    16.0     16.3     16.6     17.0     18.8 

lowest : 9.5  10   10.3 10.4 10.5, highest: 24.3 24.4 24.7 24.8 25.1
--------------------------------------------------------------------------------
platelet.hematocrit 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1905      103      297        1   0.1737  0.07137   0.0840   0.1000 
     .25      .50      .75      .90      .95 
  0.1300   0.1650   0.2050   0.2546   0.2878 

lowest : 0.009 0.01  0.013 0.016 0.024, highest: 0.531 0.576 0.611 0.621 0.702
--------------------------------------------------------------------------------
neutrophil.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      437        1   0.7509   0.1173    0.568    0.613 
     .25      .50      .75      .90      .95 
   0.683    0.759    0.827    0.881    0.908 

lowest : 0.207 0.341 0.389 0.406 0.408, highest: 0.963 0.964 0.966 0.967 0.976
--------------------------------------------------------------------------------
neutrophil.count 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1981       27      831        1     5.65    3.208     2.22     2.71 
     .25      .50      .75      .90      .95 
    3.60     4.83     6.80     9.61    11.56 

lowest : 0.74  0.85  0.92  1.01  1.03 , highest: 23.71 24.61 24.66 29.02 29.94
--------------------------------------------------------------------------------
D.dimer 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1840      168      502        1    2.447    2.708    0.530    0.610 
     .25      .50      .75      .90      .95 
   0.790    1.210    2.170    4.601    7.470 

lowest : 0     0.28  0.34  0.35  0.36 , highest: 57.46 64.76 89.97 91.6  100.1
--------------------------------------------------------------------------------
international.normalized.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1973       35      190        1     1.35   0.3794    1.026    1.060 
     .25      .50      .75      .90      .95 
   1.130    1.210    1.350    1.630    2.050 

lowest : 0.83  0.85  0.87  0.88  0.92 , highest: 8.7   8.89  10.36 15.16 16.59
--------------------------------------------------------------------------------
activated.partial.thromboplastin.time 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1974       34      318        1    35.39    7.397    26.80    28.30 
     .25      .50      .75      .90      .95 
   30.83    34.10    38.20    42.80    47.00 

lowest : 20    20.3  20.7  21    21.6 , highest: 95    96.2  96.3  105.9 181.9
--------------------------------------------------------------------------------
thrombin.time 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1974       34      128    0.999    17.24    2.346     13.7     15.0 
     .25      .50      .75      .90      .95 
    16.3     17.2     17.9     18.8     19.3 

lowest : 9.7   9.8   10    10.4  10.6 , highest: 64.4  78.5  105.8 129.2 209.3
--------------------------------------------------------------------------------
prothrombin.activity 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1962       46      456        1     66.4    20.33    31.30    41.41 
     .25      .50      .75      .90      .95 
   56.60    68.50    78.40    87.72    93.00 

lowest : 3.8    4.2    5.6    6.5    7.3   , highest: 123.96 124.2  128.1  131.28 141.51
--------------------------------------------------------------------------------
prothrombin.time.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1973       35      193        1    1.353   0.3802    1.030    1.060 
     .25      .50      .75      .90      .95 
   1.130    1.220    1.360    1.640    2.064 

lowest : 0.85  0.87  0.89  0.92  0.93 , highest: 8.01  8.89  10.36 13.66 14.89
--------------------------------------------------------------------------------
fibrinogen 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1974       34      317        1    3.224    1.115    1.957    2.160 
     .25      .50      .75      .90      .95 
   2.493    3.040    3.700    4.610    5.310 

lowest : 0.51 0.61 0.69 0.87 1.02, highest: 7.94 8.1  8.21 8.31 8.32
--------------------------------------------------------------------------------
high.sensitivity.troponin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1929       79      424        1   0.2815   0.4719   0.0040   0.0090 
     .25      .50      .75      .90      .95 
  0.0230   0.0550   0.1190   0.2950   0.6194 

lowest : 0      0.001  0.002  0.003  0.004 , highest: 26.718 26.94  31.719 34.206 45.675
--------------------------------------------------------------------------------
myoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     398     1610      392        1    123.3    116.1    27.56    33.65 
     .25      .50      .75      .90      .95 
   47.84    78.43   132.12   251.71   397.29 

lowest : 17.58  19.87  20.36  21.28  21.75 , highest: 753.23 804.76 812.37 990.17 994.82
--------------------------------------------------------------------------------
carbon.dioxide.binding.capacity 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1997       11      266        1    23.82    5.287     15.9     18.2 
     .25      .50      .75      .90      .95 
    20.9     23.9     26.7     29.4     31.2 

lowest : 2.1  2.7  3.1  4.9  5.6 , highest: 40.3 43.3 43.8 44.9 47.2
--------------------------------------------------------------------------------
calcium 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1997       11      116        1    2.293   0.1971     2.02     2.09 
     .25      .50      .75      .90      .95 
    2.18     2.29     2.40     2.51     2.59 

lowest : 1.39 1.44 1.52 1.59 1.67, highest: 2.88 2.95 2.98 3.02 3.37
--------------------------------------------------------------------------------
potassium 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1997       11      330        1    3.981   0.7509    3.028    3.200 
     .25      .50      .75      .90      .95 
   3.530    3.880    4.330    4.880    5.262 

lowest : 1.67 2.29 2.31 2.44 2.46, highest: 6.94 6.97 7.1  7.29 11.1
--------------------------------------------------------------------------------
chloride 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1997       11      290        1    101.8     6.53    90.98    94.00 
     .25      .50      .75      .90      .95 
   98.70   102.60   105.90   108.24   109.80 

lowest : 70.9  73.3  73.7  75.6  75.9 , highest: 115.9 118.2 120.9 122   125.1
--------------------------------------------------------------------------------
sodium 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1997       11      236        1    138.2    5.174    129.8    132.4 
     .25      .50      .75      .90      .95 
   136.0    139.0    141.4    143.4    144.4 

lowest : 107.5 107.9 111   114.4 115.7, highest: 150   154   155.1 158.5 159  
--------------------------------------------------------------------------------
Inorganic.Phosphorus 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     407     1601      122        1    1.129    0.382    0.713    0.790 
     .25      .50      .75      .90      .95 
   0.910    1.040    1.195    1.504    1.967 

lowest : 0.54 0.55 0.57 0.58 0.59, highest: 3.14 3.2  3.21 3.77 4.26
--------------------------------------------------------------------------------
serum.magnesium 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     407     1601       67    0.999   0.8709   0.1479    0.653    0.710 
     .25      .50      .75      .90      .95 
   0.785    0.870    0.950    1.040    1.100 

lowest : 0.52 0.53 0.54 0.56 0.57, highest: 1.21 1.26 1.31 1.32 1.49
--------------------------------------------------------------------------------
creatine.kinase.isoenzyme.to.creatine.kinase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1767      241      173    0.999   0.2107   0.1504  0.05000  0.07238 
     .25      .50      .75      .90      .95 
 0.11000  0.17000  0.26000  0.40000  0.50000 

lowest : 0.0130435 0.02      0.0244864 0.0257143 0.0272467
highest: 0.91      0.93      0.98      0.99      1        
--------------------------------------------------------------------------------
hydroxybutyrate.dehydrogenase.to.lactate.dehydrogenase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1767      241       62    0.999   0.8171   0.1012     0.66     0.71 
     .25      .50      .75      .90      .95 
    0.77     0.82     0.88     0.93     0.96 

lowest : 0.36 0.41 0.42 0.43 0.44, highest: 1    1.02 1.05 1.13 1.15
--------------------------------------------------------------------------------
hydroxybutyrate.dehydrogenase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1767      241      316        1    215.5    92.16    129.0    140.0 
     .25      .50      .75      .90      .95 
   160.0    187.0    227.0    299.0    366.7 

lowest :   78   85   92   99  102, highest: 1605 1650 2192 2657 2666
--------------------------------------------------------------------------------
glutamic.oxaloacetic.transaminase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1755      253      186    0.999    63.22    79.68     13.0     16.0 
     .25      .50      .75      .90      .95 
    20.0     26.0     39.0     68.0    116.3 

lowest :    5    6    7    8    9, highest: 3437 3454 4262 6490 6640
--------------------------------------------------------------------------------
creatine.kinase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1767      241      330        1    136.4    121.6     38.0     45.0 
     .25      .50      .75      .90      .95 
    64.0     91.0    137.0    216.0    324.4 

lowest :   17   18   19   21   22, highest: 3010 3105 5609 5876 5920
--------------------------------------------------------------------------------
creatine.kinase.isoenzyme 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1767      241      406        1    19.76    12.82     7.10     9.00 
     .25      .50      .75      .90      .95 
   12.10    16.10    21.80    30.94    40.47 

lowest : 0.9   1     1.8   2.1   2.3  , highest: 187.7 205.9 217.1 338   424  
--------------------------------------------------------------------------------
lactate.dehydrogenase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1767      241      377        1    273.3    132.5    157.0    168.0 
     .25      .50      .75      .90      .95 
   195.0    229.0    282.0    368.8    469.5 

lowest :  107  109  110  113  115, highest: 2616 2818 2881 4039 6279
--------------------------------------------------------------------------------
brain.natriuretic.peptide 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1973       35     1861        1     1280     1373    75.91   122.28 
     .25      .50      .75      .90      .95 
  303.94   753.03  1738.52  3385.09  5000.00 

lowest : 2.69    11.1    12.13   13.61   14.05  
highest: 4888.87 4916.26 4917.83 4982.85 5000   
--------------------------------------------------------------------------------
high.sensitivity.protein 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     941     1067      430        1    25.09    31.78      1.2      2.0 
     .25      .50      .75      .90      .95 
     4.0      9.4     29.8     80.6    104.0 

lowest : 0.1   0.2   0.3   0.4   0.5  , highest: 168.6 172   172.1 178.9 188.1
--------------------------------------------------------------------------------
nucleotidase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1479      529      164        1    4.292    3.281     1.40     1.60 
     .25      .50      .75      .90      .95 
    2.20     3.00     5.00     8.42    11.41 

lowest : 0.3  0.6  0.7  0.8  0.9 , highest: 25.9 26.4 28.6 29.4 31.4
--------------------------------------------------------------------------------
fucosidase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1479      529      281        1    19.37    6.694    10.60    12.40 
     .25      .50      .75      .90      .95 
   15.20    18.70    22.80    27.22    30.30 

lowest : 3.8  4.6  4.9  5    6.4 , highest: 39.4 39.5 42.8 44.4 59.2
--------------------------------------------------------------------------------
albumin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      255        1    36.53    5.526     28.0     30.1 
     .25      .50      .75      .90      .95 
    33.5     36.8     39.8     42.6     44.1 

lowest : 12.6 15.5 17.3 17.6 19  , highest: 51   51.1 52.4 52.6 52.7
--------------------------------------------------------------------------------
white.globulin.ratio 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102       25    0.989     1.33   0.3412      0.8      1.0 
     .25      .50      .75      .90      .95 
     1.1      1.3      1.5      1.7      1.8 

lowest : 0.1 0.3 0.4 0.5 0.6, highest: 2.2 2.3 2.4 2.5 2.6
--------------------------------------------------------------------------------
glutamyltranspeptidase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      250        1    61.07    57.84     12.0     15.0 
     .25      .50      .75      .90      .95 
    22.0     39.0     74.0    124.0    186.8 

lowest :   5   6   7   8   9, highest: 574 586 623 712 721
--------------------------------------------------------------------------------
glutamic.pyruvic.transaminase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      197    0.999    53.97    71.67     8.00    10.00 
     .25      .50      .75      .90      .95 
   13.00    21.00    35.75    73.00   114.50 

lowest :    3    4    5    6    7, highest: 2096 2147 2551 3085 4928
--------------------------------------------------------------------------------
glutamic.oxaliplatin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     592     1416      193        1    1.372   0.7181    0.590    0.690 
     .25      .50      .75      .90      .95 
   0.910    1.200    1.655    2.149    2.602 

lowest : 0.01  0.29  0.3   0.31  0.41 , highest: 4.56  4.75  4.8   5.71  11.14
--------------------------------------------------------------------------------
indirect.bilirubin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      351        1    13.71    8.832     4.40     5.40 
     .25      .50      .75      .90      .95 
    7.80    11.60    17.10    23.70    29.55 

lowest : 1     1.2   1.4   1.5   1.7  , highest: 69.5  76.8  87.9  90.6  128.6
--------------------------------------------------------------------------------
alkaline.phosphatase 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      194        1    89.48     39.3     48.0     53.0 
     .25      .50      .75      .90      .95 
    64.0     80.0    102.0    132.0    162.5 

lowest :   19   25   30   31   32, highest:  313  347  354  400 1085
--------------------------------------------------------------------------------
globulin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      278        1    28.57    6.255    20.80    22.05 
     .25      .50      .75      .90      .95 
   24.70    27.90    31.40    35.45    38.65 

lowest : 13.7 15.5 16.2 16.4 16.6, highest: 65.2 66.2 67.4 70   88.3
--------------------------------------------------------------------------------
direct.bilirubin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      298        1    9.025    7.662      2.1      2.7 
     .25      .50      .75      .90      .95 
     4.1      6.5     10.5     17.1     24.4 

lowest : 0.3   0.4   0.6   0.8   0.9  , highest: 72.9  76.2  77.5  109.2 179.8
--------------------------------------------------------------------------------
total.bilirubin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      507        1    22.73    15.93     6.90     8.60 
     .25      .50      .75      .90      .95 
   12.12    18.30    27.80    40.40    53.35 

lowest : 1.6   1.8   1.9   2.8   2.9  , highest: 139.1 144.8 164.5 167.4 256.6
--------------------------------------------------------------------------------
total.bile.acid 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1498      510      279        1    8.554    9.351    1.285    1.670 
     .25      .50      .75      .90      .95 
   2.600    4.700    8.800   17.200   27.415 

lowest : 0     0.1   0.5   0.6   0.7  , highest: 110.8 122.6 134.1 166.2 170.2
--------------------------------------------------------------------------------
total.protein 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1906      102      349        1     65.1    8.154    54.00    56.40 
     .25      .50      .75      .90      .95 
   60.12    64.70    69.38    74.60    77.47 

lowest : 41.7  42.1  42.9  43.4  43.6 , highest: 91.8  94.7  97.3  99    100.9
--------------------------------------------------------------------------------
erythrocyte.sedimentation.rate 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     307     1701       89    0.999    35.51    32.79      2.0      4.0 
     .25      .50      .75      .90      .95 
    12.0     28.0     50.5     81.2    101.0 

lowest :   2   3   4   5   6, highest: 106 115 118 119 120
--------------------------------------------------------------------------------
cholesterol 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1810      198      451        1    3.723    1.199    2.180    2.410 
     .25      .50      .75      .90      .95 
   2.970    3.600    4.317    5.131    5.660 

lowest : 1.2  1.37 1.5  1.53 1.58, highest: 8.27 8.37 8.41 8.58 10  
--------------------------------------------------------------------------------
low.density.lipoprotein.cholesterol 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1810      198      327        1    1.861   0.8268    0.840    1.000 
     .25      .50      .75      .90      .95 
   1.320    1.760    2.288    2.800    3.270 

lowest : 0.35 0.38 0.43 0.47 0.48, highest: 4.99 5.11 5.17 5.47 6.41
--------------------------------------------------------------------------------
triglyceride 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1810      198      274        1    1.161   0.7054    0.490    0.559 
     .25      .50      .75      .90      .95 
   0.710    0.960    1.310    1.840    2.480 

lowest : 0.22  0.24  0.26  0.28  0.31 , highest: 6.79  10.76 14.24 22.46 24.45
--------------------------------------------------------------------------------
high.density.lipoprotein.cholesterol 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1810      198      188        1    1.103   0.3904    0.590    0.680 
     .25      .50      .75      .90      .95 
   0.860    1.070    1.300    1.550    1.745 

lowest : 0.02 0.05 0.17 0.23 0.26, highest: 2.39 2.43 2.55 2.59 2.74
--------------------------------------------------------------------------------
homocysteine 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     146     1862      140        1    21.41    11.41    10.27    11.98 
     .25      .50      .75      .90      .95 
   14.71    18.49    23.37    32.23    43.87 

lowest : 7.55   8.1    8.17   8.58   9.03  , highest: 53.28  67.7   75.63  77.15  120.19
--------------------------------------------------------------------------------
apolipoprotein.A 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     176     1832       96        1    1.303    0.371    0.780    0.895 
     .25      .50      .75      .90      .95 
   1.080    1.310    1.525    1.740    1.808 

lowest : 0.53 0.57 0.62 0.68 0.7 , highest: 1.93 1.98 2.01 2.06 2.2 
--------------------------------------------------------------------------------
apolipoprotein.B 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     176     1832       75    0.999   0.7635    0.301   0.4225   0.4500 
     .25      .50      .75      .90      .95 
  0.5700   0.7200   0.9225   1.0200   1.2850 

lowest : 0.3  0.37 0.38 0.39 0.4 , highest: 1.54 1.65 1.76 1.84 2.07
--------------------------------------------------------------------------------
lipoprotein 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     176     1832      148        1    231.8    230.4     29.0     37.0 
     .25      .50      .75      .90      .95 
    60.5    153.0    324.0    546.0    684.5 

lowest :    5   16   17   21   24, highest:  935  938  976 1005 1160
--------------------------------------------------------------------------------
pH 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015       53    0.997    7.413  0.07362     7.29     7.34 
     .25      .50      .75      .90      .95 
    7.39     7.42     7.45     7.49     7.51 

lowest : 6.77 6.81 7.05 7.06 7.07, highest: 7.55 7.56 7.57 7.58 7.6 
--------------------------------------------------------------------------------
standard.residual.base 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      229        1   -2.387    5.687   -10.84    -8.20 
     .25      .50      .75      .90      .95 
   -5.10    -2.20     0.50     3.88     5.84 

lowest : -32.7 -32.6 -22.8 -22.4 -20.3, highest: 13.6  17.1  18.3  20.2  24.9 
--------------------------------------------------------------------------------
standard.bicarbonate 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      178        1    22.99    4.142    17.02    18.70 
     .25      .50      .75      .90      .95 
   21.00    23.00    25.00    27.50    28.94 

lowest : 2.6  4    8.6  9.1  10.1, highest: 36   38.6 40.2 42   42.7
--------------------------------------------------------------------------------
partial.pressure.of.carbon.dioxide 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015       59    0.998     35.8    9.217       25       27 
     .25      .50      .75      .90      .95 
      30       34       39       45       50 

lowest :  12  16  18  19  20, highest:  77  80  96  97 143
--------------------------------------------------------------------------------
total.carbon.dioxide 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      232        1    23.31     5.47    15.70    17.40 
     .25      .50      .75      .90      .95 
   20.40    23.20    25.90    29.38    31.50 

lowest : 2.1  2.8  7.9  8.1  8.3 , highest: 41.2 41.3 42.4 44.1 57.5
--------------------------------------------------------------------------------
methemoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     992     1016       16    0.939   0.2405   0.1724      0.0      0.1 
     .25      .50      .75      .90      .95 
     0.1      0.2      0.3      0.4      0.5 
                                                                            
Value        0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.1
Frequency     87   222   224   333    59    30    10     9     7     3     1
Proportion 0.088 0.224 0.226 0.336 0.059 0.030 0.010 0.009 0.007 0.003 0.001
                                        
Value        1.3   1.4   1.7   1.9   2.3
Frequency      1     3     1     1     1
Proportion 0.001 0.003 0.001 0.001 0.001

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
hematocrit.blood.gas 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     992     1016       48    0.998    36.58    8.184    23.00    27.00 
     .25      .50      .75      .90      .95 
   32.00    37.00    41.00    45.00    47.45 

lowest :  10  12  13  14  15, highest:  54  55  56  60 111
--------------------------------------------------------------------------------
reduced.hemoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     992     1016      148        1    4.215    4.255    0.800    1.000 
     .25      .50      .75      .90      .95 
   1.500    2.500    4.625    8.000   12.435 

lowest : 0.3  0.4  0.5  0.6  0.7 , highest: 44.2 56   63.3 64.5 74.7
--------------------------------------------------------------------------------
potassium.ion 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      266        1    3.893   0.7163    2.980    3.142 
     .25      .50      .75      .90      .95 
   3.470    3.810    4.200    4.728    5.126 

lowest : 1.5  2.28 2.32 2.37 2.44, highest: 6.54 6.6  6.62 6.91 7.13
--------------------------------------------------------------------------------
chloride.ion 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015       45    0.997    103.9    6.743       93       96 
     .25      .50      .75      .90      .95 
     101      105      108      111      112 

lowest :  73  77  78  80  81, highest: 117 118 119 120 121
--------------------------------------------------------------------------------
sodium.ion 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      213        1    136.2    5.299    127.2    130.0 
     .25      .50      .75      .90      .95 
   133.6    137.1    139.6    141.4    142.5 

lowest : 109.3 113.1 114.3 114.4 115  , highest: 145.6 146   147.2 148.6 148.8
--------------------------------------------------------------------------------
glucose.blood.gas 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      159        1     7.83    3.623     4.46     4.80 
     .25      .50      .75      .90      .95 
    5.60     6.70     8.70    12.48    15.30 

lowest : 0.2  1.9  2.2  2.4  2.8 , highest: 30.1 33.1 40.2 40.8 41  
--------------------------------------------------------------------------------
lactate 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015       77    0.998    2.308    1.341     1.10     1.20 
     .25      .50      .75      .90      .95 
    1.50     1.80     2.50     3.50     4.84 

lowest : 0.6  0.7  0.8  0.9  1   , highest: 14.1 14.7 16.9 17   19.9
--------------------------------------------------------------------------------
measured.residual.base 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      218        1   -1.898    4.961    -9.34    -7.10 
     .25      .50      .75      .90      .95 
   -4.20    -1.70     0.60     3.40     5.00 

lowest : -31.6 -30.5 -21.4 -20.8 -18.6, highest: 12.4  14.7  16.4  18    18.4 
--------------------------------------------------------------------------------
measured.bicarbonate 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      209        1    22.22    5.239    15.00    16.50 
     .25      .50      .75      .90      .95 
   19.40    22.20    24.80    28.08    30.04 

lowest : 1.8  2.3  7.1  7.5  8.3 , highest: 39   39.7 40.8 42.5 53.1
--------------------------------------------------------------------------------
carboxyhemoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     992     1016       35    0.979   0.6056   0.5434    0.100    0.100 
     .25      .50      .75      .90      .95 
   0.300    0.400    0.800    1.300    1.645 

lowest : 0   0.1 0.2 0.3 0.4, highest: 3   3.1 3.3 3.7 5.3
--------------------------------------------------------------------------------
body.temperature.blood.gas 
       n  missing distinct     Info     Mean      Gmd 
     993     1015        1        0       37        0 
              
Value       37
Frequency  993
Proportion   1
--------------------------------------------------------------------------------
oxygen.saturation 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015       38    0.962    95.81    4.303     87.6     92.0 
     .25      .50      .75      .90      .95 
    95.0     98.0     99.0     99.0     99.0 

lowest :  25  35  36  43  56, highest:  96  97  98  99 100
--------------------------------------------------------------------------------
partial.oxygen.pressure 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      171        1    108.1    42.03       57       65 
     .25      .50      .75      .90      .95 
      80      102      134      158      176 

lowest :  20  27  28  33  35, highest: 224 226 242 243 255
--------------------------------------------------------------------------------
oxyhemoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     992     1016      155        1    94.94    4.419    86.70    90.71 
     .25      .50      .75      .90      .95 
   94.50    96.60    97.80    98.30    98.60 

lowest : 24.3 35.1 36.3 43   55.7, highest: 98.7 98.8 98.9 99   99.1
--------------------------------------------------------------------------------
anion.gap 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015      187        1    14.02    4.388      7.8      9.2 
     .25      .50      .75      .90      .95 
    11.7     13.8     16.1     18.6     20.2 

lowest : -1.2 -0.8 2.9  3.2  4.3 , highest: 33   34.3 34.7 34.8 43.7
--------------------------------------------------------------------------------
free.calcium 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     993     1015       38    0.996     1.11  0.05766    1.026    1.050 
     .25      .50      .75      .90      .95 
   1.080    1.110    1.140    1.170    1.190 

lowest : 0.89 0.92 0.94 0.95 0.96, highest: 1.26 1.27 1.3  1.38 1.39
--------------------------------------------------------------------------------
total.hemoglobin 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
     992     1016      131        1    124.1    27.38       79       91 
     .25      .50      .75      .90      .95 
     110      127      139      152      161 

lowest :  35  42  43  48  50, highest: 185 187 189 192 203
--------------------------------------------------------------------------------
GCS 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       10    0.083    14.83   0.3316       15       15 
     .25      .50      .75      .90      .95 
      15       15       15       15       15 
                                                                      
Value          3     4     6     7    10    11    12    13    14    15
Frequency     13     1     1     4     7    19     3     2     7  1951
Proportion 0.006 0.000 0.000 0.002 0.003 0.009 0.001 0.001 0.003 0.972

For the frequency table, variable is rounded to the nearest 0
--------------------------------------------------------------------------------
dischargeDay 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    2008        0       54    0.992    9.421    6.174        3        4 
     .25      .50      .75      .90      .95 
       6        8       10       15       21 

lowest :   1   2   3   4   5, highest:  76  82  88 118 123
--------------------------------------------------------------------------------
ageCat 
       n  missing distinct 
    2008        0        8 
                                                                         
Value       (21,29]  (29,39]  (39,49]  (49,59]  (59,69]  (69,79]  (79,89]
Frequency         4       12       56      106      368      715      646
Proportion    0.002    0.006    0.028    0.053    0.183    0.356    0.322
                   
Value      (89,110]
Frequency       101
Proportion    0.050
--------------------------------------------------------------------------------

Variables with all observations missing:

[1] cholinesterase


2.3 Clean and transform dataframe

2.3.1 Clean column names

2.3.1.1 Column name before clean

col_name_before <- names(zigong)
details::details(
  summary = 'Click to show results!',
  col_name_before
)
Click to show results!

...1
inpatient.number
DestinationDischarge
admission.ward
admission.way
occupation
discharge.department
visit.times
gender
body.temperature
pulse
respiration
systolic.blood.pressure
diastolic.blood.pressure
map
weight
height
BMI
type.of.heart.failure
NYHA.cardiac.function.classification
Killip.grade
myocardial.infarction
congestive.heart.failure
peripheral.vascular.disease
cerebrovascular.disease
dementia
Chronic.obstructive.pulmonary.disease
connective.tissue.disease
peptic.ulcer.disease
diabetes
moderate.to.severe.chronic.kidney.disease
hemiplegia
leukemia
malignant.lymphoma
solid.tumor
liver.disease
AIDS
CCI.score
type.II.respiratory.failure
consciousness
eye.opening
verbal.response
movement
respiratory.support.
oxygen.inhalation
fio2
acute.renal.failure
LVEF
left.ventricular.end.diastolic.diameter.LV
mitral.valve.EMS
mitral.valve.AMS
EA
tricuspid.valve.return.velocity
tricuspid.valve.return.pressure
outcome.during.hospitalization
death.within.28.days
re.admission.within.28.days
death.within.3.months
re.admission.within.3.months
death.within.6.months
re.admission.within.6.months
time.of.death..days.from.admission.
re.admission.time..days.from.admission.
return.to.emergency.department.within.6.months
time.to.emergency.department.within.6.months
creatinine.enzymatic.method
urea
uric.acid
glomerular.filtration.rate
cystatin
white.blood.cell
monocyte.ratio
monocyte.count
red.blood.cell
coefficient.of.variation.of.red.blood.cell.distribution.width
standard.deviation.of.red.blood.cell.distribution.width
mean.corpuscular.volume
hematocrit
lymphocyte.count
mean.hemoglobin.volume
mean.hemoglobin.concentration
mean.platelet.volume
basophil.ratio
basophil.count
eosinophil.ratio
eosinophil.count
hemoglobin
platelet
platelet.distribution.width
platelet.hematocrit
neutrophil.ratio
neutrophil.count
D.dimer
international.normalized.ratio
activated.partial.thromboplastin.time
thrombin.time
prothrombin.activity
prothrombin.time.ratio
fibrinogen
high.sensitivity.troponin
myoglobin
carbon.dioxide.binding.capacity
calcium
potassium
chloride
sodium
Inorganic.Phosphorus
serum.magnesium
creatine.kinase.isoenzyme.to.creatine.kinase
hydroxybutyrate.dehydrogenase.to.lactate.dehydrogenase
hydroxybutyrate.dehydrogenase
glutamic.oxaloacetic.transaminase
creatine.kinase
creatine.kinase.isoenzyme
lactate.dehydrogenase
brain.natriuretic.peptide
high.sensitivity.protein
nucleotidase
fucosidase
albumin
white.globulin.ratio
cholinesterase
glutamyltranspeptidase
glutamic.pyruvic.transaminase
glutamic.oxaliplatin
indirect.bilirubin
alkaline.phosphatase
globulin
direct.bilirubin
total.bilirubin
total.bile.acid
total.protein
erythrocyte.sedimentation.rate
cholesterol
low.density.lipoprotein.cholesterol
triglyceride
high.density.lipoprotein.cholesterol
homocysteine
apolipoprotein.A
apolipoprotein.B
lipoprotein
pH
standard.residual.base
standard.bicarbonate
partial.pressure.of.carbon.dioxide
total.carbon.dioxide
methemoglobin
hematocrit.blood.gas
reduced.hemoglobin
potassium.ion
chloride.ion
sodium.ion
glucose.blood.gas
lactate
measured.residual.base
measured.bicarbonate
carboxyhemoglobin
body.temperature.blood.gas
oxygen.saturation
partial.oxygen.pressure
oxyhemoglobin
anion.gap
free.calcium
total.hemoglobin
GCS
dischargeDay
ageCat


#### Column name after clean

  • Copy data frame to data_clean: this this case zigong to zigong_clean
# Copy data frame to data_clean
zigong_clean <- zigong %>%
  as_tibble()
  • Clean column names
zigong_clean <- zigong %>% 
  clean_names()

col_name_after <- names(zigong_clean)
details::details(
  summary = 'Click to show results!',
  col_name_after
)
Click to show results!

x1
inpatient_number
destination_discharge
admission_ward
admission_way
occupation
discharge_department
visit_times
gender
body_temperature
pulse
respiration
systolic_blood_pressure
diastolic_blood_pressure
map
weight
height
bmi
type_of_heart_failure
nyha_cardiac_function_classification
killip_grade
myocardial_infarction
congestive_heart_failure
peripheral_vascular_disease
cerebrovascular_disease
dementia
chronic_obstructive_pulmonary_disease
connective_tissue_disease
peptic_ulcer_disease
diabetes
moderate_to_severe_chronic_kidney_disease
hemiplegia
leukemia
malignant_lymphoma
solid_tumor
liver_disease
aids
cci_score
type_ii_respiratory_failure
consciousness
eye_opening
verbal_response
movement
respiratory_support
oxygen_inhalation
fio2
acute_renal_failure
lvef
left_ventricular_end_diastolic_diameter_lv
mitral_valve_ems
mitral_valve_ams
ea
tricuspid_valve_return_velocity
tricuspid_valve_return_pressure
outcome_during_hospitalization
death_within_28_days
re_admission_within_28_days
death_within_3_months
re_admission_within_3_months
death_within_6_months
re_admission_within_6_months
time_of_death_days_from_admission
re_admission_time_days_from_admission
return_to_emergency_department_within_6_months
time_to_emergency_department_within_6_months
creatinine_enzymatic_method
urea
uric_acid
glomerular_filtration_rate
cystatin
white_blood_cell
monocyte_ratio
monocyte_count
red_blood_cell
coefficient_of_variation_of_red_blood_cell_distribution_width
standard_deviation_of_red_blood_cell_distribution_width
mean_corpuscular_volume
hematocrit
lymphocyte_count
mean_hemoglobin_volume
mean_hemoglobin_concentration
mean_platelet_volume
basophil_ratio
basophil_count
eosinophil_ratio
eosinophil_count
hemoglobin
platelet
platelet_distribution_width
platelet_hematocrit
neutrophil_ratio
neutrophil_count
d_dimer
international_normalized_ratio
activated_partial_thromboplastin_time
thrombin_time
prothrombin_activity
prothrombin_time_ratio
fibrinogen
high_sensitivity_troponin
myoglobin
carbon_dioxide_binding_capacity
calcium
potassium
chloride
sodium
inorganic_phosphorus
serum_magnesium
creatine_kinase_isoenzyme_to_creatine_kinase
hydroxybutyrate_dehydrogenase_to_lactate_dehydrogenase
hydroxybutyrate_dehydrogenase
glutamic_oxaloacetic_transaminase
creatine_kinase
creatine_kinase_isoenzyme
lactate_dehydrogenase
brain_natriuretic_peptide
high_sensitivity_protein
nucleotidase
fucosidase
albumin
white_globulin_ratio
cholinesterase
glutamyltranspeptidase
glutamic_pyruvic_transaminase
glutamic_oxaliplatin
indirect_bilirubin
alkaline_phosphatase
globulin
direct_bilirubin
total_bilirubin
total_bile_acid
total_protein
erythrocyte_sedimentation_rate
cholesterol
low_density_lipoprotein_cholesterol
triglyceride
high_density_lipoprotein_cholesterol
homocysteine
apolipoprotein_a
apolipoprotein_b
lipoprotein
p_h
standard_residual_base
standard_bicarbonate
partial_pressure_of_carbon_dioxide
total_carbon_dioxide
methemoglobin
hematocrit_blood_gas
reduced_hemoglobin
potassium_ion
chloride_ion
sodium_ion
glucose_blood_gas
lactate
measured_residual_base
measured_bicarbonate
carboxyhemoglobin
body_temperature_blood_gas
oxygen_saturation
partial_oxygen_pressure
oxyhemoglobin
anion_gap
free_calcium
total_hemoglobin
gcs
discharge_day
age_cat


2.3.2 Converting categorical columns to factors

2.3.2.1 Review data frame before converting

head(zigong_clean)

2.3.2.2 Convert and transform data

# Convert relevant columns to factors with appropriate labels
# Function to factorize
convert_to_factor <- function(df, cols) {
  for (col in cols) {
    if (col %in% names(df)) {
      df[[col]] <- as.factor(df[[col]])
      print(paste("Column", col, "is converted to factor"))
    } else {
      print(paste("Columns", col, "does not exist in the data frame"))
    }
  }
  return(df)
}

# Column name list
categorial_columns <- c("destination_discharge", "admission_ward", "admission_way", "occupation", "discharge_department", "gender", "type_of_heart_failure", "nyha_cardiac_function_classification", "killip_grade", "type_ii_respiratory_failure", "consciousness", "respiratory_support", "outcome_during_hospitalization")

zigong_clean <- convert_to_factor(zigong_clean, categorial_columns)
## [1] "Column destination_discharge is converted to factor"
## [1] "Column admission_ward is converted to factor"
## [1] "Column admission_way is converted to factor"
## [1] "Column occupation is converted to factor"
## [1] "Column discharge_department is converted to factor"
## [1] "Column gender is converted to factor"
## [1] "Column type_of_heart_failure is converted to factor"
## [1] "Column nyha_cardiac_function_classification is converted to factor"
## [1] "Column killip_grade is converted to factor"
## [1] "Column type_ii_respiratory_failure is converted to factor"
## [1] "Column consciousness is converted to factor"
## [1] "Column respiratory_support is converted to factor"
## [1] "Column outcome_during_hospitalization is converted to factor"

2.3.3 Review data frame after converting

str(zigong_clean)
## spc_tbl_ [2,008 × 167] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ x1                                                           : num [1:2008] 1 2 3 4 5 6 7 8 9 10 ...
##  $ inpatient_number                                             : num [1:2008] 857781 743087 866418 775928 810128 ...
##  $ destination_discharge                                        : Factor w/ 4 levels "Died","HealthcareFacility",..: 3 3 3 3 3 3 3 2 3 3 ...
##  $ admission_ward                                               : Factor w/ 4 levels "Cardiology","GeneralWard",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ admission_way                                                : Factor w/ 2 levels "Emergency","NonEmergency": 2 2 2 1 2 2 2 1 2 2 ...
##  $ occupation                                                   : Factor w/ 5 levels "farmer","Officer",..: 4 4 1 4 4 4 4 4 4 4 ...
##  $ discharge_department                                         : Factor w/ 4 levels "Cardiology","GeneralWard",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ visit_times                                                  : num [1:2008] 1 1 2 1 1 1 2 1 1 1 ...
##  $ gender                                                       : Factor w/ 2 levels "Female","Male": 2 1 2 2 1 1 2 1 1 1 ...
##  $ body_temperature                                             : num [1:2008] 36.7 36.8 36.5 36 35 36 36 36 36.2 36.5 ...
##  $ pulse                                                        : num [1:2008] 87 95 98 73 88 110 52 94 85 65 ...
##  $ respiration                                                  : num [1:2008] 19 18 18 19 19 18 25 18 24 18 ...
##  $ systolic_blood_pressure                                      : num [1:2008] 102 150 102 110 134 96 129 112 166 130 ...
##  $ diastolic_blood_pressure                                     : num [1:2008] 64 70 67 74 62 64 54 60 85 80 ...
##  $ map                                                          : num [1:2008] 76.7 96.7 78.7 86 86 ...
##  $ weight                                                       : num [1:2008] 50 51 70 65 76 45 60 50 60 56 ...
##  $ height                                                       : num [1:2008] 1.64 1.63 1.7 1.7 1.55 1.56 1.65 1.55 1.55 1.56 ...
##  $ bmi                                                          : num [1:2008] 18.6 19.2 24.2 22.5 31.6 ...
##  $ type_of_heart_failure                                        : Factor w/ 3 levels "Both","Left",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ nyha_cardiac_function_classification                         : Factor w/ 3 levels "II","III","IV": 2 2 1 2 2 2 2 2 1 2 ...
##  $ killip_grade                                                 : Factor w/ 4 levels "I","II","III",..: 3 1 2 2 2 1 1 2 3 1 ...
##  $ myocardial_infarction                                        : num [1:2008] 0 0 0 0 0 0 0 0 0 1 ...
##  $ congestive_heart_failure                                     : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ peripheral_vascular_disease                                  : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ cerebrovascular_disease                                      : num [1:2008] 0 0 0 0 0 0 0 0 1 0 ...
##  $ dementia                                                     : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ chronic_obstructive_pulmonary_disease                        : num [1:2008] 1 0 0 1 0 0 1 0 0 0 ...
##  $ connective_tissue_disease                                    : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ peptic_ulcer_disease                                         : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ diabetes                                                     : num [1:2008] 1 0 0 0 0 0 0 0 0 0 ...
##  $ moderate_to_severe_chronic_kidney_disease                    : num [1:2008] 0 0 0 1 0 0 0 0 0 0 ...
##  $ hemiplegia                                                   : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ leukemia                                                     : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ malignant_lymphoma                                           : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ solid_tumor                                                  : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ liver_disease                                                : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ aids                                                         : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ cci_score                                                    : num [1:2008] 2 0 0 2 0 0 1 0 1 1 ...
##  $ type_ii_respiratory_failure                                  : Factor w/ 2 levels "NonTypeII","TypeII": 1 1 1 1 1 1 1 1 1 1 ...
##  $ consciousness                                                : Factor w/ 4 levels "Clear","Nonresponsive",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ eye_opening                                                  : num [1:2008] 4 4 4 4 4 4 4 4 4 4 ...
##  $ verbal_response                                              : num [1:2008] 5 5 5 5 5 5 5 5 5 5 ...
##  $ movement                                                     : num [1:2008] 6 6 6 6 6 6 6 6 6 6 ...
##  $ respiratory_support                                          : Factor w/ 3 levels "IMV","NIMV","None": 3 3 3 3 3 3 3 3 3 3 ...
##  $ oxygen_inhalation                                            : chr [1:2008] "OxygenTherapy" "OxygenTherapy" "OxygenTherapy" "OxygenTherapy" ...
##  $ fio2                                                         : num [1:2008] 33 33 33 33 33 33 33 33 33 33 ...
##  $ acute_renal_failure                                          : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ lvef                                                         : num [1:2008] NA NA NA NA NA NA NA NA NA NA ...
##  $ left_ventricular_end_diastolic_diameter_lv                   : num [1:2008] NA 40 46 NA 64 NA NA NA NA 42 ...
##  $ mitral_valve_ems                                             : num [1:2008] NA 1.16 0.84 NA 1.2 NA NA NA NA 1.28 ...
##  $ mitral_valve_ams                                             : num [1:2008] NA 1.52 0.12 NA 0.9 NA NA NA NA 0.06 ...
##  $ ea                                                           : num [1:2008] NA NA 7 NA 1.33 NA NA NA NA 21.3 ...
##  $ tricuspid_valve_return_velocity                              : num [1:2008] NA 3.34 2.8 NA NA NA NA NA NA 2.7 ...
##  $ tricuspid_valve_return_pressure                              : num [1:2008] NA 47 32 NA NA NA NA NA NA 30 ...
##  $ outcome_during_hospitalization                               : Factor w/ 3 levels "Alive","Dead",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ death_within_28_days                                         : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ re_admission_within_28_days                                  : num [1:2008] 0 0 0 1 0 1 0 0 0 0 ...
##  $ death_within_3_months                                        : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ re_admission_within_3_months                                 : num [1:2008] 0 0 0 1 0 1 0 1 0 0 ...
##  $ death_within_6_months                                        : num [1:2008] 0 0 0 0 0 0 0 0 0 0 ...
##  $ re_admission_within_6_months                                 : num [1:2008] 0 0 0 1 0 1 0 1 0 0 ...
##  $ time_of_death_days_from_admission                            : num [1:2008] NA NA NA NA NA NA NA NA NA NA ...
##  $ re_admission_time_days_from_admission                        : num [1:2008] NA NA NA 19 NA 15 NA 58 NA NA ...
##  $ return_to_emergency_department_within_6_months               : num [1:2008] 0 0 0 1 0 1 0 1 0 1 ...
##  $ time_to_emergency_department_within_6_months                 : num [1:2008] NA NA NA 19 NA 15 NA 58 NA NA ...
##  $ creatinine_enzymatic_method                                  : num [1:2008] 108.3 62 185.1 104.8 83.9 ...
##  $ urea                                                         : num [1:2008] 12.55 4.29 15.99 8.16 6.86 ...
##  $ uric_acid                                                    : num [1:2008] 685 170 567 635 432 717 392 544 454 338 ...
##  $ glomerular_filtration_rate                                   : num [1:2008] 58.6 85.4 31.5 58 60.5 ...
##  $ cystatin                                                     : num [1:2008] 1.32 1.25 2.43 2.32 1.35 4.78 4.28 1.66 2.07 1.13 ...
##  $ white_blood_cell                                             : num [1:2008] 9.43 5.32 13.01 2.18 6.14 ...
##  $ monocyte_ratio                                               : num [1:2008] 0.085 0.066 0.051 0.087 0.086 0.066 0.075 0.082 0.052 0.06 ...
##  $ monocyte_count                                               : num [1:2008] 0.8 0.35 0.67 0.19 0.53 0.71 0.55 0.6 0.22 0.4 ...
##  $ red_blood_cell                                               : num [1:2008] 4.09 3.93 4.43 2.71 4.2 3.44 3.35 3.92 4.11 4.07 ...
##  $ coefficient_of_variation_of_red_blood_cell_distribution_width: num [1:2008] 15 13 12.9 18.8 14.7 14.6 14.1 14.3 14.5 13.8 ...
##  $ standard_deviation_of_red_blood_cell_distribution_width      : num [1:2008] 50.3 40.7 44.5 69 47.7 44.3 55.6 52.5 46.6 46.6 ...
##  $ mean_corpuscular_volume                                      : num [1:2008] 94.5 88.6 96.4 102.2 90 ...
##  $ hematocrit                                                   : num [1:2008] 0.387 0.349 0.427 0.277 0.378 0.293 0.378 0.399 0.368 0.377 ...
##  $ lymphocyte_count                                             : num [1:2008] 1.51 1.18 0.75 0.71 1.27 0.6 1.28 1.35 0.24 0.95 ...
##  $ mean_hemoglobin_volume                                       : num [1:2008] 32 28.9 32.6 34.7 28.6 29.1 35 33.8 29.3 30.5 ...
##  $ mean_hemoglobin_concentration                                : num [1:2008] 338 326 339 339 317 342 310 332 326 329 ...
##  $ mean_platelet_volume                                         : num [1:2008] 14 11.3 12 12.4 12.6 10 12.7 11.9 13.1 14.2 ...
##  $ basophil_ratio                                               : num [1:2008] 0.006 0.002 0.002 0 0.002 0.003 0.002 0.003 0.004 0.001 ...
##  $ basophil_count                                               : num [1:2008] 0.05 0.01 0.03 0 0.01 0.03 0.02 0.02 0.02 0.01 ...
##  $ eosinophil_ratio                                             : num [1:2008] 0.002 0.009 0.002 0.028 0.008 0.003 0.034 0.012 0.008 0.002 ...
##  $ eosinophil_count                                             : num [1:2008] 0.02 0.05 0.02 0.06 0.05 0.03 0.25 0.09 0.03 0.02 ...
##  $ hemoglobin                                                   : num [1:2008] 131 114 144 94 120 100 117 132 120 124 ...
##  $ platelet                                                     : num [1:2008] 102 123 169 35 104 189 83 116 117 144 ...
##  $ platelet_distribution_width                                  : num [1:2008] 16.6 16.1 16.5 19.4 16.4 16.2 17.2 16.3 16.8 16.6 ...
##  $ platelet_hematocrit                                          : num [1:2008] 0.142 0.139 0.203 0.032 0.131 0.188 0.106 0.138 0.154 0.205 ...
##  $ neutrophil_ratio                                             : num [1:2008] 0.746 0.702 0.887 0.559 0.697 0.873 0.716 0.719 0.88 0.793 ...
##  $ neutrophil_count                                             : num [1:2008] 7.05 3.73 11.54 1.22 4.28 ...
##  $ d_dimer                                                      : num [1:2008] 1.19 1.06 0.83 1.39 0.57 1.46 5.27 0.66 0.76 0.68 ...
##  $ international_normalized_ratio                               : num [1:2008] 1.39 1.16 1.1 1.37 1.19 1.17 1.25 1.29 1.05 1.23 ...
##  $ activated_partial_thromboplastin_time                        : num [1:2008] 33.4 35.9 36.2 38.3 34.6 36 28.6 34.1 33.4 27.4 ...
##  $ thrombin_time                                                : num [1:2008] 17 18.4 14.9 18.3 16.4 14.6 17.6 17.1 14.8 17.4 ...
##  $ prothrombin_activity                                         : num [1:2008] 60.3 65.1 85.7 60.9 74.9 76.6 69.3 55.7 91.7 70.8 ...
##  $ prothrombin_time_ratio                                       : num [1:2008] 1.37 1.18 1.09 1.36 1.19 1.17 1.25 1.33 1.05 1.23 ...
##  $ fibrinogen                                                   : num [1:2008] 3.84 2.62 5.74 2.63 3.5 5.31 3.16 2.51 2.66 2.48 ...
##   [list output truncated]
##  - attr(*, "spec")=
##   .. cols(
##   ..   ...1 = col_double(),
##   ..   inpatient.number = col_double(),
##   ..   DestinationDischarge = col_character(),
##   ..   admission.ward = col_character(),
##   ..   admission.way = col_character(),
##   ..   occupation = col_character(),
##   ..   discharge.department = col_character(),
##   ..   visit.times = col_double(),
##   ..   gender = col_character(),
##   ..   body.temperature = col_double(),
##   ..   pulse = col_double(),
##   ..   respiration = col_double(),
##   ..   systolic.blood.pressure = col_double(),
##   ..   diastolic.blood.pressure = col_double(),
##   ..   map = col_double(),
##   ..   weight = col_double(),
##   ..   height = col_double(),
##   ..   BMI = col_double(),
##   ..   type.of.heart.failure = col_character(),
##   ..   NYHA.cardiac.function.classification = col_character(),
##   ..   Killip.grade = col_character(),
##   ..   myocardial.infarction = col_double(),
##   ..   congestive.heart.failure = col_double(),
##   ..   peripheral.vascular.disease = col_double(),
##   ..   cerebrovascular.disease = col_double(),
##   ..   dementia = col_double(),
##   ..   Chronic.obstructive.pulmonary.disease = col_double(),
##   ..   connective.tissue.disease = col_double(),
##   ..   peptic.ulcer.disease = col_double(),
##   ..   diabetes = col_double(),
##   ..   moderate.to.severe.chronic.kidney.disease = col_double(),
##   ..   hemiplegia = col_double(),
##   ..   leukemia = col_double(),
##   ..   malignant.lymphoma = col_double(),
##   ..   solid.tumor = col_double(),
##   ..   liver.disease = col_double(),
##   ..   AIDS = col_double(),
##   ..   CCI.score = col_double(),
##   ..   type.II.respiratory.failure = col_character(),
##   ..   consciousness = col_character(),
##   ..   eye.opening = col_double(),
##   ..   verbal.response = col_double(),
##   ..   movement = col_double(),
##   ..   respiratory.support. = col_character(),
##   ..   oxygen.inhalation = col_character(),
##   ..   fio2 = col_double(),
##   ..   acute.renal.failure = col_double(),
##   ..   LVEF = col_double(),
##   ..   left.ventricular.end.diastolic.diameter.LV = col_double(),
##   ..   mitral.valve.EMS = col_double(),
##   ..   mitral.valve.AMS = col_double(),
##   ..   EA = col_double(),
##   ..   tricuspid.valve.return.velocity = col_double(),
##   ..   tricuspid.valve.return.pressure = col_double(),
##   ..   outcome.during.hospitalization = col_character(),
##   ..   death.within.28.days = col_double(),
##   ..   re.admission.within.28.days = col_double(),
##   ..   death.within.3.months = col_double(),
##   ..   re.admission.within.3.months = col_double(),
##   ..   death.within.6.months = col_double(),
##   ..   re.admission.within.6.months = col_double(),
##   ..   time.of.death..days.from.admission. = col_double(),
##   ..   re.admission.time..days.from.admission. = col_double(),
##   ..   return.to.emergency.department.within.6.months = col_double(),
##   ..   time.to.emergency.department.within.6.months = col_double(),
##   ..   creatinine.enzymatic.method = col_double(),
##   ..   urea = col_double(),
##   ..   uric.acid = col_double(),
##   ..   glomerular.filtration.rate = col_double(),
##   ..   cystatin = col_double(),
##   ..   white.blood.cell = col_double(),
##   ..   monocyte.ratio = col_double(),
##   ..   monocyte.count = col_double(),
##   ..   red.blood.cell = col_double(),
##   ..   coefficient.of.variation.of.red.blood.cell.distribution.width = col_double(),
##   ..   standard.deviation.of.red.blood.cell.distribution.width = col_double(),
##   ..   mean.corpuscular.volume = col_double(),
##   ..   hematocrit = col_double(),
##   ..   lymphocyte.count = col_double(),
##   ..   mean.hemoglobin.volume = col_double(),
##   ..   mean.hemoglobin.concentration = col_double(),
##   ..   mean.platelet.volume = col_double(),
##   ..   basophil.ratio = col_double(),
##   ..   basophil.count = col_double(),
##   ..   eosinophil.ratio = col_double(),
##   ..   eosinophil.count = col_double(),
##   ..   hemoglobin = col_double(),
##   ..   platelet = col_double(),
##   ..   platelet.distribution.width = col_double(),
##   ..   platelet.hematocrit = col_double(),
##   ..   neutrophil.ratio = col_double(),
##   ..   neutrophil.count = col_double(),
##   ..   D.dimer = col_double(),
##   ..   international.normalized.ratio = col_double(),
##   ..   activated.partial.thromboplastin.time = col_double(),
##   ..   thrombin.time = col_double(),
##   ..   prothrombin.activity = col_double(),
##   ..   prothrombin.time.ratio = col_double(),
##   ..   fibrinogen = col_double(),
##   ..   high.sensitivity.troponin = col_double(),
##   ..   myoglobin = col_double(),
##   ..   carbon.dioxide.binding.capacity = col_double(),
##   ..   calcium = col_double(),
##   ..   potassium = col_double(),
##   ..   chloride = col_double(),
##   ..   sodium = col_double(),
##   ..   Inorganic.Phosphorus = col_double(),
##   ..   serum.magnesium = col_double(),
##   ..   creatine.kinase.isoenzyme.to.creatine.kinase = col_double(),
##   ..   hydroxybutyrate.dehydrogenase.to.lactate.dehydrogenase = col_double(),
##   ..   hydroxybutyrate.dehydrogenase = col_double(),
##   ..   glutamic.oxaloacetic.transaminase = col_double(),
##   ..   creatine.kinase = col_double(),
##   ..   creatine.kinase.isoenzyme = col_double(),
##   ..   lactate.dehydrogenase = col_double(),
##   ..   brain.natriuretic.peptide = col_double(),
##   ..   high.sensitivity.protein = col_double(),
##   ..   nucleotidase = col_double(),
##   ..   fucosidase = col_double(),
##   ..   albumin = col_double(),
##   ..   white.globulin.ratio = col_double(),
##   ..   cholinesterase = col_logical(),
##   ..   glutamyltranspeptidase = col_double(),
##   ..   glutamic.pyruvic.transaminase = col_double(),
##   ..   glutamic.oxaliplatin = col_double(),
##   ..   indirect.bilirubin = col_double(),
##   ..   alkaline.phosphatase = col_double(),
##   ..   globulin = col_double(),
##   ..   direct.bilirubin = col_double(),
##   ..   total.bilirubin = col_double(),
##   ..   total.bile.acid = col_double(),
##   ..   total.protein = col_double(),
##   ..   erythrocyte.sedimentation.rate = col_double(),
##   ..   cholesterol = col_double(),
##   ..   low.density.lipoprotein.cholesterol = col_double(),
##   ..   triglyceride = col_double(),
##   ..   high.density.lipoprotein.cholesterol = col_double(),
##   ..   homocysteine = col_double(),
##   ..   apolipoprotein.A = col_double(),
##   ..   apolipoprotein.B = col_double(),
##   ..   lipoprotein = col_double(),
##   ..   pH = col_double(),
##   ..   standard.residual.base = col_double(),
##   ..   standard.bicarbonate = col_double(),
##   ..   partial.pressure.of.carbon.dioxide = col_double(),
##   ..   total.carbon.dioxide = col_double(),
##   ..   methemoglobin = col_double(),
##   ..   hematocrit.blood.gas = col_double(),
##   ..   reduced.hemoglobin = col_double(),
##   ..   potassium.ion = col_double(),
##   ..   chloride.ion = col_double(),
##   ..   sodium.ion = col_double(),
##   ..   glucose.blood.gas = col_double(),
##   ..   lactate = col_double(),
##   ..   measured.residual.base = col_double(),
##   ..   measured.bicarbonate = col_double(),
##   ..   carboxyhemoglobin = col_double(),
##   ..   body.temperature.blood.gas = col_double(),
##   ..   oxygen.saturation = col_double(),
##   ..   partial.oxygen.pressure = col_double(),
##   ..   oxyhemoglobin = col_double(),
##   ..   anion.gap = col_double(),
##   ..   free.calcium = col_double(),
##   ..   total.hemoglobin = col_double(),
##   ..   GCS = col_double(),
##   ..   dischargeDay = col_double(),
##   ..   ageCat = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(zigong_clean)

2.3.4 Renaming and Re-labeling the age_cat column

2.3.4.1 Review ‘levels’ columns age_cat before processing

levels(
  as.factor(
    zigong_clean[["age_cat"]]
    )
  )
## [1] "(21,29]"  "(29,39]"  "(39,49]"  "(49,59]"  "(59,69]"  "(69,79]"  "(79,89]" 
## [8] "(89,110]"

2.3.4.2 Renaming and Re-labeling the age_cat column

# Define proper labels for the age_cat column
age_labels <- c("21-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+")

# Relabel the age_cat column
zigong_clean <- zigong_clean %>%
  mutate(age_cat = factor(age_cat, labels = age_labels))

2.3.4.3 Review ‘levels’ age_cat column after processing

levels(
  as.factor(
    zigong_clean[["age_cat"]]
    )
  )
## [1] "21-29" "30-39" "40-49" "50-59" "60-69" "70-79" "80-89" "90+"

2.3.4.4 View data frame

head(zigong_clean)

3 Task 2 : Create Individual Plots

3.1 Visualize and display plots

# Age Distribution
p1 <- ggplot(zigong_clean, aes(x = age_cat)) +
  geom_bar(fill = "#B9DDF1") + 
  theme_minimal() +
  labs(title = "Age Group Distribution", x = "Age Group", y = "Count")

# Gender Distribution
p2 <- ggplot(zigong_clean, aes(x = gender)) + 
  geom_bar(fill = "#A5CFE9") +
  theme_minimal() +
  labs(title = "Gender Distribution", x = "Gender", y = "Count")

# Height Distribution
p3 <- ggplot(zigong_clean, aes(x = height)) +
  geom_bar(fill = "#9FCAE6") +
  theme_minimal() +
  labs(title = "Height Distribution", x = "Height (m)", y = "Count")

# Weight Distribution
p4 <- ggplot(zigong_clean, aes(x = weight)) +
  geom_histogram(binwidth = 5, fill = "#8DBBDC") +
  theme_minimal() +
  labs(title = "Weight Distribution", x = "Weight (kg)", y = "Count")

# BMI Distribution
p5 <- ggplot(zigong_clean, aes(x = bmi)) +
  geom_histogram(binwidth = 5, fill = "#78A9CE") +
  theme_minimal() +
  labs(title = "BMI Distribution", x = "BMI", y = "Count")

# Type of Heart Failure
p6 <- ggplot(zigong_clean, aes(x = type_of_heart_failure)) +
  geom_bar(fill = "#699AC2") +
  theme_minimal() +
  labs(title = "Type of Heart Failure Distribution", x = "Type of Heart Failure", y = "Count")

# Numeric variable 1: High Sensitivity Troponin
p7 <- ggplot(zigong_clean, aes(x = high_sensitivity_troponin)) +
  geom_histogram(binwidth = 0.1, fill = "#5F90BE") +
  scale_x_log10()+
  theme_minimal() +
  labs(title = "High Sensitivity Troponin Distribution", x = "HS Troponin", y = "Count")

# Numeric variable 2: Visit times
p8 <- ggplot(zigong_clean, aes(x = visit_times)) +
  geom_histogram(binwidth = 5, fill = "#4D7EAB") +
  theme_minimal() +
  labs(title = "Visit times Distribution", x = "Visit times", y = "Count")

# Numeric variable 3: Systolic blood pressure
p9 <- ggplot(zigong_clean, aes(x = systolic_blood_pressure)) +
  geom_histogram(binwidth = 5, fill = "#3F6E9A") +
  theme_minimal() +
  labs(title = "Systolic BP Distribution", x = "Systolic BP (mmHg)", y = "Count")

# Numeric variable 4: Diastolic blood pressure
p10 <- ggplot(zigong_clean, aes(x = diastolic_blood_pressure)) +
  geom_histogram(binwidth = 5, fill = "#2E5B88") +
  theme_minimal() +
  labs(title = "Diastolic BP Distribution", x = "Diastolic BP (mmHg)", y = "Count")
# Display plots
p1

p2

p3

p4

p5

p6

p7
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 130 rows containing non-finite outside the scale range
## (`stat_bin()`).

p8

p9

p10

3.2 Generate plots above into slides show

3.2.1 Create list column_dict and function to display plots into slides

3.2.1.1 Create column dictionary

# List of col to plot
col_dict <- list(
  age_cat = "distribution",
  height = "distribution",
  weight = "distribution",
  bmi = "distribution",
  high_sensitivity_troponin = "distribution",
  systolic_blood_pressure = "distribution",
  diastolic_blood_pressure = "distribution",
  type_of_heart_failure = "count",
  gender = "count",
  visit_times = "count"
)

3.2.1.2 Create function to display plots into slides

# Function to create plot
create_plot <- function(data, column, plot_type) {
  # Define ID column name
  column_id_unique = "inpatient_number"
  
  # The function creates a title with the first letter capitalized
  create_title <- function(col_name) {
    if (plot_type == "distribution") {
      paste("Distribution of", tools::toTitleCase(gsub("_", " ", col_name)))
    }
    else {
      paste("Count of", tools::toTitleCase(gsub("_", " ", col_name)))
    }
  }
  
  # Define base theme
  base_theme <- theme(
   plot.title = element_text(face = "bold", hjust = 0.5, size = 12),
   axis.title = element_text(size = 8),
   axis.text = element_text(size = 6),
   legend.title = element_text(size = 8),
   legend.text = element_text(size = 6),
   legend.key.size = unit(0.5,"cm")
  )
  
  if (plot_type == "distribution") {
    # Distribution plot for continuous variables
    p <- ggplot(data, aes(x = !!sym(column), y = !!sym(column_id_unique))) +
      geom_point(fill = "#2C7E41", color ="#2C7E41") +
      ggtitle(create_title(column)) +
      coord_flip() +
      labs(x = tools::toTitleCase(gsub("_", " ", column)),
           y = tools::toTitleCase(gsub("_", " ",column_id_unique))) +
      theme_minimal() +
      base_theme
  } else if (plot_type == "count") {
    
    # Bar chart for categorical variables
    p <- ggplot(data, aes(x = !!sym(column), fill = !!sym(column))) +
      geom_bar() +
      geom_text(stat = 'count', aes(label = ..count..), vjust = -0.5) + 
      scale_fill_paletteer_d("lisa::AlbrenchDurer") +
      scale_fill_brewer(palette = "Blues") +
      ggtitle(create_title(column)) +
      coord_flip() +
      labs(x = tools::toTitleCase(gsub("_", " ", column)),
           y = "Count",
           fill = tools::toTitleCase(gsub("_", " ", column))) +
      theme(axis.text.y = element_text(angle = 0)) +
      theme_minimal() +
      base_theme
  }
  
  return(p)
}

3.2.2 Display the slides

# Use the function
plot_list <- map(names(col_dict), ~create_plot(zigong_clean, .x, col_dict[[.x]]))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
# Display
plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]
## Warning: Removed 79 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## 
## [[9]]

## 
## [[10]]
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# Create a list of charts


# Add margin for subplot grid
margin_grid = theme(plot.margin = unit(c(0.5, 1, 1, 0.5), "cm"))
tg <- textGrob('Health Data Visualisation & Communication', gp = gpar(fontsize = 16, fontface = 'bold')) 
sg <- textGrob('Visualization Data of Hospitalized patients with heart failure: integrating electronic healthcare records and external outcome data', gp = gpar(fontsize = 10))
margin_title <- unit(0.5, "line")

# Grid each column plot
 grid.arrange(tg, sg, 
             arrangeGrob(grobs = lapply(plot_list, "+", margin_grid),
                         ncol = 2),
             heights = unit.c(grobHeight(tg) + 1.2*margin_title, 
                               grobHeight(sg) + margin_title, 
                               unit(1,"null")))
## Warning: Removed 79 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

4 Task 3

4.1 Grouping slides using patchwork lib

For compare and contrast the graphs above, I will group as below: Slide 1: Height, Weight, BMI (Height and Weight will have a positive correlation to BMI) Slide 2: Age, Type of Heart Failure (I want to see if Age have any relation to Type of Heart Failure) Slide 3: Weight, Type of Heart Failure (I want to see if Weight have any relation to Type of Heart Failure) Slide 4: HS Troponin, Type of Heart Failure (I want to see if HS Troponin have any relation to Type of Heart Failure as HS Troponin is a biomarker that is used to detect cardiac injury, including myocardial infartion. Elevated levels of hs-troponin in the blood can indicate damage to the heart muscle. This biomarker is particularly sensitive and can detect even small amounts of cardiac injury ) Slide 5: Type of Heart Failure, Systolic Blood Pressure, Diastolic Blood Pressure (Blood pressure levels can influence the development, progression, and outcomes of heart failure)

#Combine plots into slide
slide1 <- p3 + p4 + p5 + plot_layout(ncol =3)
slide2 <- p1 + p6 + p8 + plot_layout(ncol =3)
slide3 <- p4 + p6 + plot_layout(ncol =2)
slide4 <- p6 + p7 + plot_layout(ncol =2)
slide5 <- p6 + p9 + p10 + plot_layout(ncol =3)

# Display slides
slide1

slide2

slide3

slide4
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 130 rows containing non-finite outside the scale range
## (`stat_bin()`).

slide5

4.1.1 Exporting Plots to PDF slides for Presentation

ggsave("slide1.pdf", slide1, width = 10, height = 5)
ggsave("slide2.pdf", slide2, width = 10, height = 5)
ggsave("slide3.pdf", slide3, width = 10, height = 5)
ggsave("slide4.pdf", slide4, width = 10, height = 5)
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 130 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggsave("slide5.pdf", slide5, width = 10, height = 5)

4.2 Using ggplot2 facetting

# Ensure the 'age' column is numeric
zigong_clean <- zigong_clean %>%
  mutate(age_cat = as.numeric(age_cat))
# Filter out non-finite values for the each plot
zigong_clean_age <- zigong_clean %>%
  filter(is.finite(age_cat))
zigong_clean_bmi <- zigong_clean %>%
  filter(is.finite(bmi))
zigong_clean_hs_troponin <- zigong_clean %>%
  filter(is.finite(high_sensitivity_troponin))
zigong_clean_systolic_bp <- zigong_clean %>%
  filter(is.finite(systolic_blood_pressure))
zigong_clean_diastolic_bp <- zigong_clean %>%
  filter(is.finite(diastolic_blood_pressure))

4.2.1 Age Distribution by Type of Heart Failure

# Viz the plot
slide2_1 <- ggplot(zigong_clean_age, aes(x = age_cat)) +
  geom_histogram(binwidth = 5, fill = "#81B1D6") +
  theme_minimal() +
  labs(title = "Age Distribution by Type of Heart Failure", x = "Age", y = "Count") +
  facet_wrap(~ type_of_heart_failure)

# Display graph
slide2_1

4.2.2 BMI Distribytion by Type of Heart Failure

# Viz the plot
slide2_2 <- ggplot(zigong_clean_bmi, aes(x = bmi)) +
  geom_histogram(binwidth = 5, fill = "#5F90BE") +
  theme_minimal() +
  labs(title = "BMI Distribution by Type of Heart Failure", x = "BMI", y = "Count") +
  facet_wrap(~ type_of_heart_failure)

# Display graph
slide2_2

4.2.3 HS Troponin Distribution by Type of Heart Failure

# Viz the plot
slide2_3 <- ggplot(zigong_clean_hs_troponin, aes(x = high_sensitivity_troponin)) +
  geom_histogram(binwidth = 5, fill = "#699AC2") +
  theme_minimal() +
  scale_x_log10() +
  labs(title = "HS Troponin Distribution by Type of Heart Failure", x = "HS Troponin", y = "Count") +
  facet_wrap(~ type_of_heart_failure)

# Display graph
slide2_3
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 51 rows containing non-finite outside the scale range
## (`stat_bin()`).

4.2.4 Systolic BP Distrinution by Type of Heart Failure

# Viz the plot
slide2_4 <- ggplot(zigong_clean_systolic_bp, aes(x = systolic_blood_pressure)) +
  geom_histogram(binwidth = 5, fill = "#78A9CE") +
  theme_minimal() +
  labs(title = "Systolic BP Distribution by Type of Heart Failure", x = "Systolic BP", y = "Count") +
  facet_wrap(~ type_of_heart_failure)

# Display graph
slide2_4

4.2.5 Diastolic BP Distribution by Type of Heart Failure

# Viz the plot
slide2_5 <- ggplot(zigong_clean_diastolic_bp, aes(x = diastolic_blood_pressure)) +
  geom_histogram(binwidth = 5, fill = "#4D7EAB") +
  theme_minimal() +
  labs(title = "Diastolic BP Distribution by Type of Heart Failure", x = "Diastolic BP", y = "Count") +
  facet_wrap(~ type_of_heart_failure)

# Display graph
slide2_5